package com.catmiao.rdd.operate.transform;

import com.google.common.collect.Lists;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import scala.Tuple2;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;

/**
 * @author ChengMiao
 * @title: Transfer_01_Map
 * @projectName spark_study
 * @description: TODO
 * @date 2024/11/25 16:27
 */
public class Transfer_12_wordcount {

    public static void main(String[] args) throws InterruptedException {

        final SparkConf conf = new SparkConf();
        conf.setAppName("appName");
        conf.setMaster("local[*]");

        final JavaSparkContext jsc = new JavaSparkContext(conf);


        JavaRDD<String> rdd = jsc.textFile("datas/words");

        JavaRDD<String> wordRdd = rdd.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public Iterator<String> call(String s) throws Exception {
                return Arrays.asList(s.split(" ")).iterator();
            }
        });

        JavaPairRDD<String, Iterable<String>> groupRdd = wordRdd.groupBy(i -> i);


        JavaPairRDD<String, Integer> pairRDD = groupRdd.mapValues(iter -> {
            int num = 0;
            for (String s : iter) {
                num++;
            }
            return num;
        });


        pairRDD
                .collect().forEach(System.out::println);


        jsc.close();


    }
}
